Analysis Dependencies

library(ggplot2)  # (Wickham, 2016)
library(tidyr)    # (Wickham and Henry, 2020)
library(dplyr)    # (Wickham et al., 2020)
library(reshape2) # (Wickham, 2007)
library(cowplot)  # (Wilke, 2019)
library(hexbin)   # (Carr, Lewin-Kog, Maechler, and Sarkar, 2020)

We conducted these analyses using the following computing environment:

print(version)
##                _                           
## platform       x86_64-apple-darwin15.6.0   
## arch           x86_64                      
## os             darwin15.6.0                
## system         x86_64, darwin15.6.0        
## status                                     
## major          3                           
## minor          6.2                         
## year           2019                        
## month          12                          
## day            12                          
## svn rev        77560                       
## language       R                           
## version.string R version 3.6.2 (2019-12-12)
## nickname       Dark and Stormy Night

Setup

theme_set(theme_cowplot())
alpha <- 0.05

data_path <- "./data/agg_data.csv"
agg_data <- read.csv(data_path, na.strings="NONE")

agg_data$BIT_FLIP_PROB <- as.factor(agg_data$BIT_FLIP_PROB)
agg_data$DRIFT <- agg_data$TOURNAMENT_SIZE==1

# Label
chg_rate_label <- function(mag, interval, drift) {
  if (drift) { return("drift") }
  else if (interval == 0) { return("0") }
  else { return(paste(mag, interval, sep="/")) }
}

agg_data$chg_rate_label <- factor(mapply(chg_rate_label, 
                                            agg_data$CHANGE_MAGNITUDE,
                                            agg_data$CHANGE_FREQUENCY,
                                            agg_data$DRIFT),
                                     levels=c("drift", "0", "1/256", "1/128",
                                              "1/64", "1/32", "1/16", "1/8",
                                              "1/4", "1/2", "1/1", "2/1", 
                                              "4/1", "8/1", "16/1", "32/1",
                                              "64/1", "128/1", "256/1", 
                                              "512/1", "1024/1", "2048/1",
                                              "4096/1"))

# Divide up the data into convenient partitions
data_gradient_phase0 <- 
  filter(agg_data,
         GRADIENT_MODEL==1 & evo_phase == 0 & update == 50000)
data_gradient_phase1 <-
  filter(agg_data,
         GRADIENT_MODEL==1 & evo_phase == 1 & update == 60000)
data_nk_phase0 <-
  filter(agg_data,
         GRADIENT_MODEL==0 & evo_phase == 0 & update == 50000)
data_nk_phase1 <-
  filter(agg_data,
         GRADIENT_MODEL==0 & evo_phase == 1 & update == 60000)

Evolution Experiment

We evolved populations for 50,000 generations under a range of environmental change rates.

Fitnesses

Note issue with fitnesses for changing environments.

Gradient Fitness Model

ggplot(data_gradient_phase0,
       aes(x=chg_rate_label, y=fitness, color=chg_rate_label)) +
  geom_boxplot() +
  xlab("Environmental Change") +
  scale_y_continuous(name="Fitness (of best organism)") +
  ggtitle("Gradient Fitness Model") +
  theme(legend.position="none")

NK Fitness Model

ggplot(data_nk_phase0,
       aes(x=chg_rate_label, y=fitness, color=chg_rate_label)) +
  geom_boxplot() +
  xlab("Env. bits flipped per generation") +
  scale_y_continuous(name="Fitness  (of best organism)") +
  ggtitle("NK Fitness Model") +
  theme(legend.position="none")

Coding Sites

Gradient Fitness Model

ggplot(data_gradient_phase0,
       aes(x=chg_rate_label, y=coding_sites, color=chg_rate_label)) +
  geom_boxplot() +
  geom_jitter(aes(color=chg_rate_label), alpha=0.1) +
  xlab("Environmental Change") +
  scale_y_continuous(name="Coding Sites (in best genotype)",
                     limits=c(0, 130),
                     breaks=seq(0, 130, 20)) +
  ggtitle("Gradient Fitness Model") +
  theme(legend.position="none")

kt <- kruskal.test(formula = coding_sites ~ chg_rate_label, data=data_gradient_phase0)
kt
## 
##  Kruskal-Wallis rank sum test
## 
## data:  coding_sites by chg_rate_label
## Kruskal-Wallis chi-squared = 1170.2, df = 14, p-value < 2.2e-16
if (kt$p.value <= alpha) {
  wt <- pairwise.wilcox.test(x=data_gradient_phase0$coding_sites,
                             g=data_gradient_phase0$chg_rate_label,
                             exact=FALSE,
                             p.adjust.method = "bonferroni")
  wt
}
## 
##  Pairwise comparisons using Wilcoxon rank sum test 
## 
## data:  data_gradient_phase0$coding_sites and data_gradient_phase0$chg_rate_label 
## 
##       drift   0       1/256   1/128   1/64    1/32    1/16    1/8     1/4    
## 0     1.00000 -       -       -       -       -       -       -       -      
## 1/256 < 2e-16 < 2e-16 -       -       -       -       -       -       -      
## 1/128 < 2e-16 < 2e-16 1.2e-12 -       -       -       -       -       -      
## 1/64  < 2e-16 < 2e-16 < 2e-16 1.1e-11 -       -       -       -       -      
## 1/32  < 2e-16 < 2e-16 < 2e-16 < 2e-16 2.5e-06 -       -       -       -      
## 1/16  < 2e-16 < 2e-16 < 2e-16 < 2e-16 6.6e-06 1.00000 -       -       -      
## 1/8   < 2e-16 < 2e-16 < 2e-16 < 2e-16 2.0e-12 0.86240 0.95950 -       -      
## 1/4   < 2e-16 < 2e-16 < 2e-16 < 2e-16 9.6e-12 0.75912 1.00000 1.00000 -      
## 1/2   < 2e-16 < 2e-16 < 2e-16 < 2e-16 2.9e-05 1.00000 1.00000 1.00000 1.00000
## 1/1   < 2e-16 < 2e-16 < 2e-16 0.04590 0.03521 6.8e-11 1.9e-11 4.9e-16 1.4e-15
## 2/1   < 2e-16 < 2e-16 0.04265 0.00396 < 2e-16 < 2e-16 < 2e-16 < 2e-16 < 2e-16
## 4/1   < 2e-16 < 2e-16 1.00000 1.8e-09 < 2e-16 < 2e-16 < 2e-16 < 2e-16 < 2e-16
## 8/1   5.0e-11 < 2e-16 0.00559 < 2e-16 < 2e-16 < 2e-16 < 2e-16 < 2e-16 < 2e-16
## 16/1  0.00045 2.3e-07 2.6e-10 < 2e-16 < 2e-16 < 2e-16 < 2e-16 < 2e-16 < 2e-16
##       1/2     1/1     2/1     4/1     8/1    
## 0     -       -       -       -       -      
## 1/256 -       -       -       -       -      
## 1/128 -       -       -       -       -      
## 1/64  -       -       -       -       -      
## 1/32  -       -       -       -       -      
## 1/16  -       -       -       -       -      
## 1/8   -       -       -       -       -      
## 1/4   -       -       -       -       -      
## 1/2   -       -       -       -       -      
## 1/1   7.1e-11 -       -       -       -      
## 2/1   < 2e-16 1.9e-09 -       -       -      
## 4/1   < 2e-16 < 2e-16 0.35140 -       -      
## 8/1   < 2e-16 < 2e-16 5.0e-08 0.04075 -      
## 16/1  < 2e-16 < 2e-16 2.6e-15 3.2e-08 0.07951
## 
## P value adjustment method: bonferroni

NK Fitness Model

ggplot(data_nk_phase0,
       aes(x=chg_rate_label, y=coding_sites, color=chg_rate_label)) +
  geom_boxplot() +
  geom_jitter(alpha=0.1) +
  xlab("Environmental Change") +
  scale_y_continuous(name="# coding sites in best organism",
                     limits=c(0, 130),
                     breaks=seq(0, 130, 20)) +
  ggtitle("NK Fitness Model") +
  theme(legend.position="none")

kt <- kruskal.test(formula = coding_sites ~ chg_rate_label, data=data_nk_phase0)
kt
## 
##  Kruskal-Wallis rank sum test
## 
## data:  coding_sites by chg_rate_label
## Kruskal-Wallis chi-squared = 1024.7, df = 14, p-value < 2.2e-16
if (kt$p.value <= alpha) {
  wt <- pairwise.wilcox.test(x=data_nk_phase0$coding_sites,
                             g=data_nk_phase0$chg_rate_label,
                             exact=FALSE,
                             p.adjust.method = "bonferroni")
  wt
}
## 
##  Pairwise comparisons using Wilcoxon rank sum test 
## 
## data:  data_nk_phase0$coding_sites and data_nk_phase0$chg_rate_label 
## 
##        drift   0       1/1     2/1     4/1     8/1     16/1    32/1    64/1   
## 0      1.00000 -       -       -       -       -       -       -       -      
## 1/1    < 2e-16 < 2e-16 -       -       -       -       -       -       -      
## 2/1    < 2e-16 < 2e-16 0.42528 -       -       -       -       -       -      
## 4/1    < 2e-16 < 2e-16 0.00176 1.00000 -       -       -       -       -      
## 8/1    < 2e-16 < 2e-16 2.5e-08 0.18676 1.00000 -       -       -       -      
## 16/1   < 2e-16 < 2e-16 5.6e-14 4.9e-05 0.00298 1.00000 -       -       -      
## 32/1   < 2e-16 < 2e-16 < 2e-16 2.6e-09 2.2e-07 0.00066 0.89884 -       -      
## 64/1   < 2e-16 < 2e-16 < 2e-16 4.6e-10 3.4e-08 0.00014 0.61880 1.00000 -      
## 128/1  < 2e-16 < 2e-16 < 2e-16 6.3e-10 3.9e-08 0.00023 0.86823 1.00000 1.00000
## 256/1  < 2e-16 < 2e-16 7.4e-08 0.27324 1.00000 1.00000 1.00000 0.00127 0.00033
## 512/1  < 2e-16 < 2e-16 1.00000 0.03631 0.00011 3.7e-09 2.1e-14 < 2e-16 < 2e-16
## 1024/1 5.8e-07 < 2e-16 7.1e-11 9.0e-16 < 2e-16 < 2e-16 < 2e-16 < 2e-16 < 2e-16
## 2048/1 1.00000 0.09977 < 2e-16 < 2e-16 < 2e-16 < 2e-16 < 2e-16 < 2e-16 < 2e-16
## 4096/1 1.00000 0.00634 < 2e-16 < 2e-16 < 2e-16 < 2e-16 < 2e-16 < 2e-16 < 2e-16
##        128/1   256/1   512/1   1024/1  2048/1 
## 0      -       -       -       -       -      
## 1/1    -       -       -       -       -      
## 2/1    -       -       -       -       -      
## 4/1    -       -       -       -       -      
## 8/1    -       -       -       -       -      
## 16/1   -       -       -       -       -      
## 32/1   -       -       -       -       -      
## 64/1   -       -       -       -       -      
## 128/1  -       -       -       -       -      
## 256/1  0.00047 -       -       -       -      
## 512/1  < 2e-16 7.9e-09 -       -       -      
## 1024/1 < 2e-16 < 2e-16 1.5e-07 -       -      
## 2048/1 < 2e-16 < 2e-16 < 2e-16 0.00088 -      
## 4096/1 < 2e-16 < 2e-16 < 2e-16 5.1e-15 1.8e-05
## 
## P value adjustment method: bonferroni

Genome Length

Gradient Fitness Model

ggplot(data_gradient_phase0,
       aes(x=chg_rate_label, y=genome_length, color=chg_rate_label)) +
  geom_boxplot() +
  geom_jitter(alpha=0.1) +
  xlab("Environmental Change") +
  ylab("Genome Length") +
  ylim(0, 1025) +
  ggtitle("Gradient Fitness Model") +
  theme(legend.position="none")

NK Fitness Model

ggplot(data_nk_phase0,
       aes(x=chg_rate_label, y=genome_length, color=chg_rate_label)) +
  geom_boxplot() +
  geom_jitter(alpha=0.1) +
  xlab("Environmental Change") +
  ylab("Genome Length") +
  ylim(0, 1024) +
  ggtitle("NK Fitness Model") +
  theme(legend.position="none")

Neutral Sites

Gradient Fitness Model

ggplot(data_gradient_phase0,
       aes(x=chg_rate_label, y=neutral_sites, color=chg_rate_label)) +
  geom_boxplot() +
  geom_jitter(alpha=0.1) +
  xlab("Environmental Change") +
  ylab("Neutral Sites") +
  ylim(-1, 1025) +
  ggtitle("Gradient Fitness Model") +
  theme(legend.position="none")

NK Fitness Model

ggplot(data_nk_phase0,
       aes(x=chg_rate_label, y=neutral_sites, color=chg_rate_label)) +
  geom_boxplot() +
  geom_jitter(alpha=0.1) +
  xlab("Environmental Change") +
  ylab("Neutral Sites") +
  ylim(-1, 1025) + # jitter can jitter below 0
  ggtitle("NK Fitness Model") +
  theme(legend.position="none")

Transfer experiment

We lock-down genetic architectures and evolve for 10,000 generations in a random static environment.

Fitnesses

Gradient Fitness Model

ggplot(data_gradient_phase1,
       aes(x=chg_rate_label, y=fitness, color=chg_rate_label)) +
  geom_boxplot() +
  geom_jitter(alpha=0.1) +
  xlab("Environmental Change") +
  scale_y_continuous(name="Fitness") +
  ggtitle("Gradient Fitness Model (transfer)") +
  theme(legend.position="none")

kt <- kruskal.test(formula = fitness ~ chg_rate_label, data=data_gradient_phase1)
kt
## 
##  Kruskal-Wallis rank sum test
## 
## data:  fitness by chg_rate_label
## Kruskal-Wallis chi-squared = 1085.3, df = 14, p-value < 2.2e-16
if (kt$p.value <= alpha) {
  wt <- pairwise.wilcox.test(x=data_gradient_phase1$fitness,
                             g=data_gradient_phase1$chg_rate_label,
                             exact=FALSE,
                             p.adjust.method = "bonferroni")
  wt
}
## 
##  Pairwise comparisons using Wilcoxon rank sum test 
## 
## data:  data_gradient_phase1$fitness and data_gradient_phase1$chg_rate_label 
## 
##       drift   0       1/256   1/128   1/64    1/32    1/16    1/8     1/4    
## 0     1.00000 -       -       -       -       -       -       -       -      
## 1/256 < 2e-16 < 2e-16 -       -       -       -       -       -       -      
## 1/128 < 2e-16 < 2e-16 1.9e-12 -       -       -       -       -       -      
## 1/64  < 2e-16 < 2e-16 < 2e-16 1.4e-09 -       -       -       -       -      
## 1/32  < 2e-16 < 2e-16 < 2e-16 < 2e-16 0.07555 -       -       -       -      
## 1/16  < 2e-16 < 2e-16 < 2e-16 < 2e-16 0.08220 1.00000 -       -       -      
## 1/8   < 2e-16 < 2e-16 < 2e-16 < 2e-16 0.00085 1.00000 1.00000 -       -      
## 1/4   < 2e-16 < 2e-16 < 2e-16 < 2e-16 3.2e-06 1.00000 1.00000 1.00000 -      
## 1/2   < 2e-16 < 2e-16 < 2e-16 3.4e-16 0.93758 1.00000 1.00000 1.00000 0.17046
## 1/1   < 2e-16 < 2e-16 2.8e-16 0.52506 0.03977 3.1e-08 3.8e-08 1.0e-10 5.7e-13
## 2/1   < 2e-16 < 2e-16 0.08140 0.02014 1.4e-14 < 2e-16 < 2e-16 < 2e-16 < 2e-16
## 4/1   < 2e-16 < 2e-16 1.00000 1.3e-07 < 2e-16 < 2e-16 < 2e-16 < 2e-16 < 2e-16
## 8/1   5.6e-10 < 2e-16 0.14506 < 2e-16 < 2e-16 < 2e-16 < 2e-16 < 2e-16 < 2e-16
## 16/1  7.4e-05 3.5e-09 2.5e-05 < 2e-16 < 2e-16 < 2e-16 < 2e-16 < 2e-16 < 2e-16
##       1/2     1/1     2/1     4/1     8/1    
## 0     -       -       -       -       -      
## 1/256 -       -       -       -       -      
## 1/128 -       -       -       -       -      
## 1/64  -       -       -       -       -      
## 1/32  -       -       -       -       -      
## 1/16  -       -       -       -       -      
## 1/8   -       -       -       -       -      
## 1/4   -       -       -       -       -      
## 1/2   -       -       -       -       -      
## 1/1   1.3e-06 -       -       -       -      
## 2/1   < 2e-16 4.1e-06 -       -       -      
## 4/1   < 2e-16 6.5e-12 1.00000 -       -      
## 8/1   < 2e-16 < 2e-16 2.3e-06 0.06897 -      
## 16/1  < 2e-16 < 2e-16 1.1e-10 2.1e-05 1.00000
## 
## P value adjustment method: bonferroni

NK Fitness Model

ggplot(data_nk_phase1,
       aes(x=chg_rate_label, y=fitness, color=chg_rate_label)) +
  geom_boxplot() +
  geom_jitter(alpha=0.1) +
  xlab("Environmental Change") +
  scale_y_continuous(name="Fitness") +
  ggtitle("NK Fitness Model (transfer)") +
  theme(legend.position="none")

kt <- kruskal.test(formula = fitness ~ chg_rate_label, data=data_nk_phase1)
kt
## 
##  Kruskal-Wallis rank sum test
## 
## data:  fitness by chg_rate_label
## Kruskal-Wallis chi-squared = 852.15, df = 14, p-value < 2.2e-16
if (kt$p.value <= alpha) {
  wt <- pairwise.wilcox.test(x=data_nk_phase1$fitness,
                             g=data_nk_phase1$chg_rate_label,
                             exact=FALSE,
                             p.adjust.method = "bonferroni")
  wt
}
## 
##  Pairwise comparisons using Wilcoxon rank sum test 
## 
## data:  data_nk_phase1$fitness and data_nk_phase1$chg_rate_label 
## 
##        drift   0       1/1     2/1     4/1     8/1     16/1    32/1    64/1   
## 0      1.00000 -       -       -       -       -       -       -       -      
## 1/1    < 2e-16 < 2e-16 -       -       -       -       -       -       -      
## 2/1    < 2e-16 < 2e-16 1.00000 -       -       -       -       -       -      
## 4/1    < 2e-16 < 2e-16 0.22838 1.00000 -       -       -       -       -      
## 8/1    < 2e-16 < 2e-16 1.9e-05 0.14327 1.00000 -       -       -       -      
## 16/1   < 2e-16 < 2e-16 6.5e-06 0.12661 1.00000 1.00000 -       -       -      
## 32/1   < 2e-16 < 2e-16 8.3e-09 0.00061 0.03858 1.00000 1.00000 -       -      
## 64/1   < 2e-16 < 2e-16 5.2e-09 0.00034 0.01623 1.00000 1.00000 1.00000 -      
## 128/1  < 2e-16 < 2e-16 1.0e-08 0.00130 0.06766 1.00000 1.00000 1.00000 1.00000
## 256/1  < 2e-16 < 2e-16 0.00216 1.00000 1.00000 1.00000 1.00000 0.47849 0.19134
## 512/1  < 2e-16 < 2e-16 1.00000 0.68651 0.02012 5.5e-07 1.3e-07 2.5e-10 1.9e-10
## 1024/1 0.00023 2.0e-08 1.8e-08 2.6e-12 1.9e-13 < 2e-16 < 2e-16 < 2e-16 < 2e-16
## 2048/1 1.00000 1.00000 < 2e-16 < 2e-16 < 2e-16 < 2e-16 < 2e-16 < 2e-16 < 2e-16
## 4096/1 1.00000 0.00394 < 2e-16 < 2e-16 < 2e-16 < 2e-16 < 2e-16 < 2e-16 < 2e-16
##        128/1   256/1   512/1   1024/1  2048/1 
## 0      -       -       -       -       -      
## 1/1    -       -       -       -       -      
## 2/1    -       -       -       -       -      
## 4/1    -       -       -       -       -      
## 8/1    -       -       -       -       -      
## 16/1   -       -       -       -       -      
## 32/1   -       -       -       -       -      
## 64/1   -       -       -       -       -      
## 128/1  -       -       -       -       -      
## 256/1  0.62334 -       -       -       -      
## 512/1  1.5e-10 8.7e-05 -       -       -      
## 1024/1 < 2e-16 < 2e-16 2.2e-06 -       -      
## 2048/1 < 2e-16 < 2e-16 1.2e-14 0.03109 -      
## 4096/1 < 2e-16 < 2e-16 < 2e-16 2.0e-11 0.00088
## 
## P value adjustment method: bonferroni

Fitness vs Number of Coding Sites

Gradient Fitness Model

ggplot(data_gradient_phase1,
       aes(x=coding_sites, y=fitness)) +
  geom_point(aes(color=chg_rate_label)) +
  xlab("Coding Sites") +
  ylab("Fitness") +
  theme(legend.position="top") +
  ggtitle("Gradient Fitness Model")

cor.test(x=data_gradient_phase1$coding_sites, 
         y=data_gradient_phase1$fitness,
         method="spearman",
         exact=FALSE)
## 
##  Spearman's rank correlation rho
## 
## data:  data_gradient_phase1$coding_sites and data_gradient_phase1$fitness
## S = 13381383, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.9762109

NK Fitness Model

ggplot(data_nk_phase1,
       aes(x=coding_sites, y=fitness)) +
  geom_point(aes(color=chg_rate_label)) +
  xlab("Coding Sites") +
  ylab("Fitness") +
  theme(legend.position="top") +
  ggtitle("NK Fitness Model")

cor.test(x=data_nk_phase1$coding_sites, 
         y=data_nk_phase1$fitness,
         method="spearman",
         exact=FALSE)
## 
##  Spearman's rank correlation rho
## 
## data:  data_nk_phase1$coding_sites and data_nk_phase1$fitness
## S = 60781262, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.8919444

Publication Figures

Changing Environments Promote Modularity

ggplot(data_gradient_phase0,
       aes(x=chg_rate_label, y=coding_sites)) +
  geom_boxplot() +
  geom_jitter(alpha=0.1) +
  xlab("Environmental Change") +
  scale_y_continuous(name="Coding Sites\n(best genotype)",
                     limits=c(0, 130),
                     breaks=seq(0, 130, 20)) +
  theme(legend.position="none") +
  ggsave("./imgs/coding-sites-v-chg-rate-gradient-phase0.pdf", width=10, height=7)

Modularity Promotes Evolvability

ggplot(data_gradient_phase1,
       aes(x=chg_rate_label, y=fitness)) +
  geom_boxplot() +
  xlab("Environmental Change") +
  scale_y_continuous(name="Fitness") +
  # ggtitle("Gradient Fitness Model (phase 2)") +
  theme(legend.position="none") + 
  ggsave("./imgs/fitness-v-chg-rate-gradient-phase1.pdf", width=10, height=7)

ggplot(data_gradient_phase1,
       aes(x=coding_sites, y=fitness)) +
  geom_hex(alpha=0.95) +
  scale_fill_continuous(type = "viridis", 
                        trans="log",
                        name="Count",
                        breaks=c(1, 10, 100)) +
  scale_x_continuous(name="Coding Sites",
                     limits=c(0, 130),
                     breaks=seq(0, 130, 20)) +
  ylab("Fitness") + 
  ggsave("./imgs/coding-sites-v-fitness-gradient-phase1-hex.pdf", width=10, height=7)

ggplot(data_gradient_phase1,
       aes(x=coding_sites, y=fitness)) +
  geom_density_2d() +
  stat_density_2d(aes(fill = ..level..), 
                  geom = "polygon", 
                  colour="white") +
  geom_jitter(alpha=0.05) +
  scale_fill_continuous(type = "viridis", 
                        trans="log",
                        name="Count") +
  ylab("Fitness") +
  theme(legend.position = "none")

ggplot(data_gradient_phase1,
       aes(x=coding_sites, y=fitness)) +
  geom_point(alpha=0.95) +
  scale_x_continuous(name="Coding Sites",
                     limits=c(0, 130),
                     breaks=seq(0, 130, 20)) +
  ylab("Fitness")